Setup
source("scripts/utils.R")
source("scripts/settings.R")
load("data/processed/genome.RData")
load("data/processed/dname.RData")
load("data/processed/accessibility.RData")
load("data/processed/rna_dynamics.RData")
load("data/processed/chip_dynamics.RData")
load("data/processed/cg_density.RData")
load(paste0("data/processed/h3k4me3_data_sperm_", sperm_data, ".RData"))
chip_dfs[["Spermatid"]]$cg.density <- unname(cg_density[allgenes$gene_id])
chip_dfs[["Spermatid"]]$cg.density[is.na(chip_dfs[["Spermatid"]]$cg.density)] <- 0
chip_dfs[["Spermatid"]]$promoter.dna.methyl <- DNAme_dfs[["Spermatid"]]$promoter.enrich
chip_dfs[["Sperm"]]$promoter.dna.methyl <- DNAme_dfs[["Sperm"]]$promoter.enrich
chip_dfs[["Pre-ZGA"]]$promoter.dna.methyl <- DNAme_dfs[["Pre-ZGA"]]$promoter.enrich
chip_dfs[["Pre-ZGA"]]$promoter.accessibility <- accessibility_dfs[["Pre-ZGA"]]$promoter.enrich
chip_dfs[["Spermatid"]]$expression <- expression.data[["Spermatid"]]
chip_dfs[["ZGA"]]$expression.6hpf <- expression.data[["6hpf"]]
chip_dfs[["ZGA"]]$expression.7hpf <- expression.data[["7hpf"]]
chip_dfs[["ZGA"]]$expression.8hpf <- expression.data[["8hpf"]]
chip_dfs[["ZGA"]]$expression.9hpf <- expression.data[["9hpf"]]
chip_dfs[["Spermatid"]]$expression[is.na(chip_dfs[["Spermatid"]]$expression)] <- 0
chip_dfs[["ZGA"]]$expression.6hpf[is.na(chip_dfs[["ZGA"]]$expression.6hpf)] <- 0
chip_dfs[["ZGA"]]$expression.7hpf[is.na(chip_dfs[["ZGA"]]$expression.7hpf)] <- 0
chip_dfs[["ZGA"]]$expression.8hpf[is.na(chip_dfs[["ZGA"]]$expression.8hpf)] <- 0
chip_dfs[["ZGA"]]$expression.9hpf[is.na(chip_dfs[["ZGA"]]$expression.9hpf)] <- 0
Genomic distribution of peaks
gg_color_hue <- function(n) {
hues <- seq(15, 375, length = n + 1)
hcl(h = hues, l = 65, c = 100)[1:n]
}
Xenla10.1_TxDb <- loadDb("data/raw/genome/Xenla10.1.sqlite")
## Loading required package: GenomicFeatures
pre_zga_data <- list("H3K4me3" = chip_peaks[["Pre-ZGA"]], "DNAme" = DNAme_peaks[["Pre-ZGA"]], "Accessibility" = accessibility_peaks[["Pre-ZGA"]])
genodis <- lapply(pre_zga_data, function(x) {
assignChromosomeRegion(x,
nucleotideLevel = FALSE, proximal.promoter.cutoff = c(upstream = 1000, downstream = 1000), precedence = c("Promoters", "immediateDownstream", "fiveUTRs", "threeUTRs", "Exons", "Introns"),
TxDb = Xenla10.1_TxDb
)
})
## Warning in assignChromosomeRegion(x, nucleotideLevel = FALSE,
## proximal.promoter.cutoff = c(upstream = 1000, : peaks.RD has sequence levels
## not in TxDb.
## Warning in assignChromosomeRegion(x, nucleotideLevel = FALSE,
## proximal.promoter.cutoff = c(upstream = 1000, : peaks.RD has sequence levels
## not in TxDb.
b <- matrix(unlist(genodis), nrow = 14)
genodis2 <- b[1:7, ]
colnames(genodis2) <- names(pre_zga_data)
rownames(genodis2) <- c("Promoters", "ImmediateDownstream", "FiveUTRs", "ThreeUTRs", "Exons", "Introns", "Intergenic regions")
ggdf1 <- melt(genodis2)
ggdf1$Var1 <- factor(ggdf1$Var1, levels = rev(rownames(genodis2)))
ggplot(ggdf1, aes(fill = Var1, y = value, x = Var2)) +
geom_bar(position = "stack", stat = "identity") +
theme_bw() +
ylab("%") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1), axis.title.x = element_blank(), legend.title = element_blank()) +
ggtitle("Pre-ZGA") +
scale_fill_manual(values = c("grey", gg_color_hue(6)))

ggsave("output/multiomic/pre_zga_multiomic_genomic_distribution.pdf", width = 3, height = 3)
rm(Xenla10.1_TxDb)
EnrichedHeatmaps
h3k4me3_heatmap <- EnrichedHeatmap(chip_norm_mats[["Pre-ZGA"]],
use_raster = TRUE,
heatmap_legend_param = list(title = expression("H3K4me3 [log"[2]*"]")),
column_title = "H3K4me3",
width = 4,
axis_name = c("-1kb", "TSS", "+1kb"),
col = colorRamp2(c(0, 6), c("white", "#2B539A")),
top_annotation = HeatmapAnnotation(enrich = anno_enriched(axis_param = list(side = "left"), gp = gpar(col = 1)))
)
dname_heatmap <- EnrichedHeatmap(DNAme_norm_mats[["Pre-ZGA"]],
use_raster = TRUE,
heatmap_legend_param = list(title = expression("DNAme [log"[2]*"]")),
column_title = "DNAme",
width = 4,
axis_name = c("-1kb", "TSS", "+1kb"),
col = colorRamp2(c(0, 2), c("white", "#8D396D")),
top_annotation = HeatmapAnnotation(enrich = anno_enriched(gp = gpar(col = 1)))
)
cg_density_heatmap <- Heatmap(log2(unname(cg_density[allgenes$gene_id]+1)),
use_raster = TRUE,
show_row_names = FALSE,
show_row_dend = FALSE,
cluster_rows = FALSE,
width = 1,
column_title = "",
column_title_side = "top",
heatmap_legend_param = list(title = expression("CG density [log"[2]*"]")),
col = colorRamp2(c(0, 0.05), c("white", "black")),
)
hm <- h3k4me3_heatmap + dname_heatmap + cg_density_heatmap
draw(hm)

pdf("output/multiomic/enriched_heatmap_h3k4me3_dname_cg_density.pdf", width = 5, height = 8)
draw(hm)
dev.off()
## quartz_off_screen
## 2
EnrichedHeatmap with accessibility split
set.seed(123)
accessibility_heatmap <- EnrichedHeatmap(accessibility_norm_mats[["Pre-ZGA"]],
use_raster = TRUE,
heatmap_legend_param = list(title = expression("Accessibility [log"[2]*"]")),
column_title = "Accessibility",
width = 4,
axis_name = c("-1kb", "TSS", "+1kb"),
col = colorRamp2(c(0, 5), c("white", "#BB3A36")),
row_km = 2,
top_annotation = HeatmapAnnotation(enrich = anno_enriched(axis_param = list(side = "left"), gp = gpar(col = 1)))
)
h3k4me3_heatmap <- EnrichedHeatmap(chip_norm_mats[["Pre-ZGA"]],
use_raster = TRUE,
heatmap_legend_param = list(title = expression("H3K4me3 [log"[2]*"]")),
column_title = "H3K4me3",
width = 4,
axis_name = c("-1kb", "TSS", "+1kb"),
col = colorRamp2(c(0, 6), c("white", "#2B539A")),
top_annotation = HeatmapAnnotation(enrich = anno_enriched(axis_param = list(side = "right"), gp = gpar(col = 1)))
)
hm <- accessibility_heatmap + h3k4me3_heatmap
draw(hm)

pdf("output/multiomic/enriched_heatmap_h3k4me3_accessibility.pdf", width = 4, height = 6)
draw(hm)
dev.off()
## quartz_off_screen
## 2
Peak plots
for (stage in c("Spermatid", "Sperm", "Pre-ZGA")) {
plot_peak(DNAme_norm_mats, stage = stage, dyns = rna_dyns, cols = cols_rna, remove_NA = TRUE, onlypeaks = FALSE, label = "DNAme")
ggsave(file = paste0("output/multiomic/", stage, "_dname_rna_dynamics_peak.pdf"), width = 5, height = 4)
}
## Using dyn, gene as id variables
## Using dyn, gene as id variables
## Using dyn, gene as id variables
for (stage in c("Spermatid", "Sperm", "Pre-ZGA")) {
plot_peak(DNAme_norm_mats, stage = stage, dyns = rna_dyns_timing, cols = cols_rna_timing, remove_NA = TRUE, onlypeaks = FALSE, label = "DNAme")
ggsave(file = paste0("output/multiomic/", stage, "_dname_rna_timing_dynamics_peak.pdf"), width = 5, height = 4)
}
## Using dyn, gene as id variables
## Using dyn, gene as id variables
## Using dyn, gene as id variables
for (stage in c("Spermatid", "Sperm", "Pre-ZGA")) {
plot_peak(DNAme_norm_mats, stage = stage, dyns = chip_dyns, cols = cols_chip, remove_NA = TRUE, onlypeaks = FALSE, label = "DNAme")
ggsave(file = paste0("output/multiomic/", stage, "_dname_chip_dynamics_peak.pdf"), width = 5, height = 4)
}
## Using dyn, gene as id variables
## Using dyn, gene as id variables
## Using dyn, gene as id variables
plot_peak(accessibility_norm_mats, stage = "Pre-ZGA", dyns = rna_dyns, cols = cols_rna, remove_NA = TRUE, onlypeaks = FALSE, label = "DamID")
## Using dyn, gene as id variables
ggsave(file = paste0("output/multiomic/", stage, "_accessibility_rna_dynamics_peak.pdf"), width = 5, height = 4)
plot_peak(accessibility_norm_mats, stage = "Pre-ZGA", dyns = rna_dyns_timing, cols = cols_rna_timing, remove_NA = TRUE, onlypeaks = FALSE, label = "DamID")
## Using dyn, gene as id variables
ggsave(file = paste0("output/multiomic/", stage, "_accessibility_rna_timing_dynamics_peak.pdf"), width = 5, height = 4)
plot_peak(accessibility_norm_mats, stage = "Pre-ZGA", dyns = chip_dyns, cols = cols_chip, remove_NA = TRUE, onlypeaks = FALSE, label = "DamID")
## Using dyn, gene as id variables
ggsave(file = paste0("output/multiomic/", stage, "_accessibility_chip_dynamics_peak.pdf"), width = 5, height = 4)
Violin / ECDF plots RNA dynamics
g <- plot_stage_dynamics(chip_dfs, dyns = rna_dyns, cols = cols_rna, timepoint = "Pre-ZGA", type = "promoter.accessibility", plot = "violin")
grid.arrange(g)

ggsave(file = paste0("output/multiomic/pre_zga_accessibility_rna_dynamics_violin_plot.pdf"), g, width = 5, height = 4)
for (stage in c("Spermatid", "Sperm", "Pre-ZGA")) {
g <- plot_stage_dynamics(chip_dfs, dyns = rna_dyns, cols = cols_rna, timepoint = stage, type = "promoter.dna.methyl", plot = "violin")
grid.arrange(g)
ggsave(file = paste0("output/multiomic/", stage, "_dname_rna_dynamics_violin.pdf"), g, width = 5, height = 4)
}



g <- plot_stage_dynamics(chip_dfs, dyns = rna_dyns, cols = cols_rna, timepoint = "Spermatid", type = "cg.density", plot = "violin")
grid.arrange(g)

ggsave(file = paste0("output/multiomic/cg_density_rna_dynamics_violin_plot.pdf"), g, width = 5, height = 4)
Violin / ECDF plots RNA timing dynamics
g <- plot_stage_dynamics(chip_dfs, dyns = rna_dyns_timing, cols = cols_rna_timing, timepoint = "Pre-ZGA", type = "promoter.accessibility", plot = "violin")
grid.arrange(g)

ggsave(file = paste0("output/multiomic/pre_zga_accessibility_rna_timing_dynamics_violin_plot.pdf"), g, width = 5, height = 4)
for (stage in c("Spermatid", "Sperm", "Pre-ZGA")) {
g <- plot_stage_dynamics(chip_dfs, dyns = rna_dyns_timing, cols = cols_rna_timing, timepoint = stage, type = "promoter.dna.methyl", plot = "violin")
grid.arrange(g)
ggsave(file = paste0("output/multiomic/", stage, "_dname_rna_timing_dynamics_violin_plot.pdf"), g, width = 5, height = 4)
}



g <- plot_stage_dynamics(chip_dfs, dyns = rna_dyns_timing, cols = cols_rna_timing, timepoint = "Spermatid", type = "cg.density", plot = "violin")
grid.arrange(g)

ggsave(file = paste0("output/multiomic/cg_density_rna_timing_dynamics_violin_plot.pdf"), g, width = 5, height = 4)
Violin / ECDF plots Chip dynamics
g <- plot_stage_dynamics(chip_dfs, dyns = chip_dyns, cols = cols_chip, timepoint = "Pre-ZGA", type = "promoter.accessibility", plot = "violin")
grid.arrange(g)

ggsave(file = paste0("output/multiomic/pre_zga_accessibility_chip_dynamics_violin_plot.pdf"), g, width = 5, height = 4)
for (stage in c("Spermatid", "Sperm", "Pre-ZGA")) {
g <- plot_stage_dynamics(chip_dfs, dyns = chip_dyns, cols = cols_chip, timepoint = stage, type = "promoter.dna.methyl", plot = "violin")
grid.arrange(g)
ggsave(file = paste0("output/multiomic/", stage, "_dname_chip_dynamics_violin_plot.pdf"), g, width = 5, height = 4)
}



g <- plot_stage_dynamics(chip_dfs, dyns = chip_dyns, cols = cols_chip, timepoint = "Spermatid", type = "cg.density", plot = "violin")
grid.arrange(g)

ggsave(file = paste0("output/multiomic/cg_density_chip_dynamics_violin_plot.pdf"), g, width = 5, height = 4)
Heatmap per H3K4me3/RNA dynamic
pure_labels <- c(
"Spermatid.promoter.dna.methyl" = "Spermatid",
"Sperm.promoter.dna.methyl" = "Sperm",
"Pre.ZGA.promoter.dna.methyl" = "Pre-ZGA",
"Pre.ZGA.promoter.accessibility" = "Pre-ZGA",
"ZGA.expression.9hpf" = "9hpf",
"ZGA.expression.8hpf" = "8hpf",
"ZGA.expression.7hpf" = "7hpf",
"ZGA.expression.6hpf" = "6hpf",
"CG.density" = "",
"Spermatid.peak.width" = "Spermatid",
"Sperm.peak.width" = "Sperm",
"Pre.ZGA.peak.width" = "Pre-ZGA",
"ZGA.peak.width" = "ZGA",
"Spermatid.promoter.enrich" = "Spermatid",
"Sperm.promoter.enrich" = "Sperm",
"Pre.ZGA.promoter.enrich" = "Pre-ZGA",
"ZGA.promoter.enrich" = "ZGA",
"Spermatid.expression" = "Spermatid"
)
chip_dyn_map <- melt(chip_dyns)
rownames(chip_dyn_map) <- chip_dyn_map[, 1]
label_dyn <- function(gene, dyns) {
for (i in 1:length(dyns)) {
if (any(gene %in% dyns[[i]])) {
return(names(dyns)[i])
}
}
return(NA)
}
col_fun <- colorRamp2(seq(-2, 2), rev(COL2("RdBu", n = 5)))
intersection_list <- list()
for (chip_dyn in names(chip_dyns)) {
for (rna_dyn in names(rna_dyns)) {
intersection_name <- paste(chip_dyn, rna_dyn, sep = " & ")
intersection_list[[intersection_name]] <- intersect(chip_dyns[[chip_dyn]], rna_dyns[[rna_dyn]])
}
}
heatmap_data <- data.frame(
"Spermatid.peak.width" = chip_dfs[["Spermatid"]][, c("peak.width")],
"Spermatid.promoter.enrich" = chip_dfs[["Spermatid"]][, c("promoter.enrich")],
"Spermatid.promoter.dna.methyl" = chip_dfs[["Spermatid"]][, c("promoter.dna.methyl")],
"Sperm.peak.width" = chip_dfs[["Sperm"]][, c("peak.width")],
"Sperm.promoter.enrich" = chip_dfs[["Sperm"]][, c("promoter.enrich")],
"Sperm.promoter.dna.methyl" = chip_dfs[["Sperm"]][, c("promoter.dna.methyl")],
"Pre-ZGA.peak.width" = chip_dfs[["Pre-ZGA"]][, c("peak.width")],
"Pre-ZGA.promoter.enrich" = chip_dfs[["Pre-ZGA"]][, c("promoter.enrich")],
"Pre-ZGA.promoter.dna.methyl" = chip_dfs[["Pre-ZGA"]][, c("promoter.dna.methyl")],
"Pre-ZGA.promoter.accessibility" = chip_dfs[["Pre-ZGA"]][, c("promoter.accessibility")],
"ZGA.peak.width" = chip_dfs[["ZGA"]][, c("peak.width")],
"ZGA.promoter.enrich" = chip_dfs[["ZGA"]][, c("promoter.enrich")],
"Spermatid.expression" = chip_dfs[["Spermatid"]][, c("expression")],
"ZGA.expression.6hpf" = chip_dfs[["ZGA"]][, c("expression.6hpf")],
"ZGA.expression.7hpf" = chip_dfs[["ZGA"]][, c("expression.7hpf")],
"ZGA.expression.8hpf" = chip_dfs[["ZGA"]][, c("expression.8hpf")],
"ZGA.expression.9hpf" = chip_dfs[["ZGA"]][, c("expression.9hpf")],
"CG.density" = chip_dfs[["Spermatid"]][, c("cg.density")],
"RNA.group" = sapply(allgenes$gene_id, label_dyn, dyns = rna_dyns),
"RNA.group.timing" = sapply(allgenes$gene_id, label_dyn, dyns = rna_dyns_timing),
"Dynamic" = sapply(allgenes$gene_id, label_dyn, dyns = intersection_list),
"Chip.group.detail" = sapply(allgenes$gene_id, label_dyn, dyns = chip_dyns_detail),
"Chip.group" = sapply(allgenes$gene_id, label_dyn, dyns = chip_dyns)
)
# heatmap_data$RNA.group[is.na(heatmap_data$RNA.group)] <- "ND"
# heatmap_data$RNA.group <- factor(heatmap_data$RNA.group, levels = c("GS", "GZ", "ZS", "ND"))
clustering <- FALSE
RNA dynamics heatmap
mean_data <- aggregate(. ~ RNA.group, data = heatmap_data[!colnames(heatmap_data) %in% c("Chip.group", "Dynamic", "RNA.group.timing", "Chip.group.detail")], FUN = mean)
rownames(mean_data) <- mean_data[, "RNA.group"]
mean_data <- mean_data[, -1]
scaled_data <- as.matrix(scale(mean_data))
scaled_data <- t(scaled_data)
rownames(scaled_data) <- pure_labels[rownames(scaled_data)]
hm_rna <- Heatmap(scaled_data,
use_raster = TRUE,
name = "Mean (z-score)",
col = col_fun,
# column_title = "Datasets", row_title = "Genes",
show_row_dend = clustering,
cluster_columns = clustering,
show_row_names = TRUE,
cluster_rows = clustering,
column_order = c("GS", "GZ", "ZS", "ND"),
row_split = c("Peak width", "Promoter mean", "DNA Methylation", "Peak width", "Promoter mean", "DNA Methylation", "Peak width", "Promoter mean", "DNA Methylation", "Accessibility", "Peak width", "Promoter mean", "Expression levels", "Expression levels", "Expression levels", "Expression levels", "Expression levels", "CG density"),
row_title_rot = 0,
)
draw(hm_rna)

pdf("output/multiomic/rna_dynamics_multiomic_heatmap.pdf", width = 8, height = 6)
draw(hm_rna)
dev.off()
## quartz_off_screen
## 2
mean_data <- aggregate(. ~ RNA.group.timing, data = heatmap_data[!colnames(heatmap_data) %in% c("Chip.group", "Dynamic", "Chip.group.detail", "RNA.group")], FUN = mean)
rownames(mean_data) <- mean_data[, "RNA.group.timing"]
mean_data <- mean_data[, -1]
scaled_data <- as.matrix(scale(mean_data))
scaled_data <- t(scaled_data)
rownames(scaled_data) <- pure_labels[rownames(scaled_data)]
hm_rna <- Heatmap(scaled_data,
use_raster = TRUE,
name = "Mean (z-score)",
col = col_fun,
# column_title = "Datasets", row_title = "Genes",
show_row_dend = clustering,
cluster_columns = clustering,
show_row_names = TRUE,
cluster_rows = clustering,
row_split = c("Peak width", "Promoter mean", "DNA Methylation", "Peak width", "Promoter mean", "DNA Methylation", "Peak width", "Promoter mean", "DNA Methylation", "Accessibility", "Peak width", "Promoter mean", "Expression levels", "Expression levels", "Expression levels", "Expression levels", "Expression levels", "CG density"),
row_title_rot = 0,
)
draw(hm_rna)

pdf("output/multiomic/rna_dynamics_timing_multiomic_heatmap.pdf", width = 8, height = 6)
draw(hm_rna)
dev.off()
## quartz_off_screen
## 2
Chip dynamics heatmap
mean_data <- aggregate(. ~ Chip.group, data = heatmap_data[!colnames(heatmap_data) %in% c("RNA.group", "Dynamic", "ZGA.peak.width", "Pre.ZGA.peak.width", "Spermatid.peak.width", "Sperm.peak.width", "ZGA.promoter.enrich", "Pre.ZGA.promoter.enrich", "Spermatid.promoter.enrich", "Sperm.promoter.enrich", "Chip.group.detail", "RNA.group.timing")], FUN = mean)
rownames(mean_data) <- mean_data[, "Chip.group"]
mean_data <- mean_data[, -1]
scaled_data <- as.matrix(scale(mean_data))
scaled_data <- t(scaled_data)
rownames(scaled_data) <- pure_labels[rownames(scaled_data)]
hm_chip <- Heatmap(scaled_data,
use_raster = TRUE,
name = "Mean (z-score)",
col = col_fun,
# column_title = "Datasets", row_title = "Genes",
show_row_dend = clustering,
cluster_columns = clustering,
show_row_names = TRUE,
cluster_rows = clustering,
column_order = c("Lost", "Kept", "Gained", "Absent"),
row_split = c("DNA methylation", "DNA methylation", "DNA methylation", "Accessibility", "Expression levels", "Expression levels", "Expression levels", "Expression levels", "Expression levels", "CG density"),
row_title_rot = 0,
)
draw(hm_chip)

pdf("output/multiomic/chip_dynamics_multiomic_heatmap.pdf", width = 8, height = 6)
draw(hm_chip)
dev.off()
## quartz_off_screen
## 2
mean_data <- aggregate(. ~ Chip.group.detail, data = heatmap_data[!colnames(heatmap_data) %in% c("RNA.group", "Dynamic", "ZGA.peak.width", "Pre.ZGA.peak.width", "Spermatid.peak.width", "Sperm.peak.width", "ZGA.promoter.enrich", "Pre.ZGA.promoter.enrich", "Spermatid.promoter.enrich", "Sperm.promoter.enrich", "Chip.group", "RNA.group.timing")], FUN = mean)
rownames(mean_data) <- mean_data[, "Chip.group.detail"]
mean_data <- mean_data[, -1]
scaled_data <- as.matrix(scale(mean_data))
scaled_data <- t(scaled_data)
rownames(scaled_data) <- pure_labels[rownames(scaled_data)]
hm_chip <- Heatmap(scaled_data,
use_raster = TRUE,
name = "Mean (z-score)",
col = col_fun,
# column_title = "Datasets", row_title = "Genes",
show_row_dend = clustering,
cluster_columns = clustering,
show_row_names = TRUE,
cluster_rows = clustering,
column_order = c("Lost@Sperm", "Lost@Pre-ZGA", "Lost@ZGA", "Kept", "GainedEarly", "GainedLate", "Absent"),
row_split = c("DNA methylation", "DNA methylation", "DNA methylation", "Accessibility", "Expression levels", "Expression levels", "Expression levels", "Expression levels", "Expression levels", "CG density"),
row_title_rot = 0,
)
draw(hm_chip)

pdf("output/multiomic/chip_dynamics_detail_multiomic_heatmap.pdf", width = 8, height = 6)
draw(hm_chip)
dev.off()
## quartz_off_screen
## 2
mean_data <- aggregate(. ~ Dynamic, data = heatmap_data[!colnames(heatmap_data) %in% c("RNA.group", "Chip.group", "RNA.group.timing", "Chip.group.detail")], FUN = mean)
rownames(mean_data) <- mean_data[, "Dynamic"]
mean_data <- mean_data[, -1]
scaled_data <- as.matrix(scale(mean_data))
scaled_data <- t(scaled_data)
rownames(scaled_data) <- pure_labels[rownames(scaled_data)]
hm <- Heatmap(scaled_data,
use_raster = TRUE,
name = "Mean (z-score)",
col = col_fun,
# column_title = "Datasets", row_title = "Genes",
show_row_dend = clustering,
cluster_columns = clustering,
show_row_names = TRUE,
cluster_rows = clustering,
column_order = c("Lost & GS", "Lost & GZ", "Lost & ZS", "Lost & ND",
"Kept & GS", "Kept & GZ", "Kept & ZS", "Kept & ND",
"Gained & GZ", "Gained & ZS", "Gained & ND",
"Absent & GS", "Absent & GZ", "Absent & ZS", "Absent & ND"),
row_split = c("Peak width", "Promoter mean", "DNA Methylation", "Peak width", "Promoter mean", "DNA Methylation", "Peak width", "Promoter mean", "DNA Methylation", "Accessibility", "Peak width", "Promoter mean", "Expression levels", "Expression levels", "Expression levels", "Expression levels", "Expression levels", "CG density"),
row_title_rot = 0,
)
draw(hm)

pdf("output/multiomic/rna_chip_intersection_dynamics_multiomic_heatmap.pdf", width = 8, height = 6)
draw(hm)
dev.off()
## quartz_off_screen
## 2
Correlation plot
cor_labels <- c(
"Spermatid.promoter.dna.methyl" = "Spermatid DNAme",
"Sperm.promoter.dna.methyl" = "Sperm DNAme",
"Pre.ZGA.promoter.dna.methyl" = "Pre-ZGA DNAme",
"Pre.ZGA.promoter.accessibility" = "Pre-ZGA Accessibility",
"ZGA.expression.9hpf" = "9hpf RNA expression",
"ZGA.expression.8hpf" = "8hpf RNA expression",
"ZGA.expression.7hpf" = "7hpf RNA expression",
"ZGA.expression.6hpf" = "6hpf RNA expression",
"CG.density" = "CG density",
"Spermatid.peak.width" = "Spermatid peak width",
"Sperm.peak.width" = "Sperm peak width",
"Pre.ZGA.peak.width" = "Pre-ZGA peak width",
"ZGA.peak.width" = "ZGA peak width",
"Spermatid.promoter.enrich" = "Spermatid promoter mean",
"Sperm.promoter.enrich" = "Sperm promoter mean",
"Pre.ZGA.promoter.enrich" = "Pre-ZGA promoter mean",
"ZGA.promoter.enrich" = "ZGA promoter mean",
"Spermatid.expression" = "Spermatid RNA expression"
)
df <- heatmap_data[, !colnames(heatmap_data) %in% c("RNA.group", "Chip.group", "Chip.group.detail", "RNA.group.timing", "Dynamic")]
df <- na.omit(df)
colnames(df) <- cor_labels[colnames(df)]
plot_corrplot <- function() {
corrplot(cor(df, method = "pearson"),
method = "color",
order = "FPC",
col = rev(COL2("RdBu", 200)),
tl.col = "black"
)
}
plot_corrplot()

pdf("output/multiomic/corr_plot.pdf", width = 10, height = 12)
plot_corrplot()
dev.off()
## quartz_off_screen
## 2
PCA plot
only_prezga <- TRUE
complete_data <- heatmap_data
if (only_prezga) {
pca_input <- complete_data[, colnames(complete_data) %in% c("Pre.ZGA.promoter.enrich", "Pre.ZGA.peak.width", "Pre.ZGA.promoter.dna.methyl", "Pre.ZGA.promoter.accessibility")]
} else {
pca_input <- complete_data[, !colnames(complete_data) %in% c("RNA.group", "Chip.group")]
}
df <- na.omit(pca_input)
color.groups <- complete_data[!rownames(pca_input) %in% names(na.action(df)), "Chip.group"]
cols <- cols_chip
# color.groups = complete_data[!rownames(pca_input) %in% names(na.action(df)),"RNA.group"]
# cols = cols_rna
data.pca <- princomp(scale(df), cor = TRUE)
# summary(data.pca)
fviz_eig(data.pca, addlabels = TRUE)

fviz_contrib(data.pca, choice = "var", axes = 1:2)

fviz_pca_var(data.pca,
col.var = "contrib",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE
)

fviz_pca_ind(data.pca,
col.ind = color.groups,
palette = cols,
label = "none",
pointsize = 0.5,
addEllipses = TRUE, # Concentration ellipses
ellipse.type = "norm",
legend.title = "Dynamics"
)
## Warning: Removed 2766 rows containing missing values (`geom_point()`).
## Warning: Removed 1 rows containing missing values (`geom_point()`).

Unsupervised clustering heatmap
genes <- allgenes$gene_id
modalities <- c("promoter.enrich" = "H3K4me3", "peak.width" = "H3K4me3 peak width", "promoter.accessibility" = "Accessibility", "cg.density" = "CG density", "promoter.dna.methyl" = "DNA methylation")
hm_data <- cbind(chip_dfs[["Pre-ZGA"]][, c("promoter.enrich", "promoter.dna.methyl", "promoter.accessibility")], chip_dfs[["Spermatid"]]["cg.density"])
# heatmap_data <- heatmap_data[sample(1:nrow(heatmap_data), 1000, replace=FALSE),]
colnames(hm_data) <- modalities[colnames(hm_data)]
scaled_data <- as.matrix(scale(hm_data))
set.seed(12)
kclus <- kmeans(scaled_data, 6)
cluster_order <- c(4, 3, 6, 2, 1, 5)
split <- factor(cluster_order[kclus$cluster]) # levels=cluster_order)
col_fun <- colorRamp2(seq(-2, 2), rev(COL2("RdBu", n = 5)))
hm1 <- Heatmap(t(scaled_data),
use_raster = TRUE,
name = "z-score",
col = col_fun,
# column_title = "Datasets", row_title = "Genes",
show_row_dend = FALSE,
cluster_columns = FALSE,
show_row_names = TRUE,
row_order = c("H3K4me3", "CG density", "DNA methylation", "Accessibility"),
show_column_names = FALSE,
# column_order = c("promoter.enrich", "peak.width", "promoter.dna.methyl", "promoter.accessibility"),
column_split = split,
cluster_row_slices = FALSE
)
draw(hm1)

pdf("output/multiomic/unsupervised_clustering_heatmap.pdf", width = 12, height = 2)
draw(hm1)
dev.off()
## quartz_off_screen
## 2